2020 excess mortality & voting patterns in CH
Ancillary data preparation
Population
Data extracted from px-x-0103010000_201:
Ständige und nichtständige Wohnbevölkerung nach institutionellen Gliederungen, Staatsangehörigkeit (Kategorie), Geburtsort, Geschlecht und Altersklasse
Important info from footnotes:
Letzte Änderungen: Neuer Datensatz (Jahr 2020) Stand der Datenbank: Juni 2021 Stichtag: 31. Dezember Raumbezug: Gemeinden / 18.10.2020 Datenquelle: Statistik der Bevölkerung und der Haushalte STATPOP Definition der ständigen Wohnbevölkerung
Community codes
Community mutations data after 2021-01-01 from BfS are
integrated into the data and pops are recalculated using these new codes
to match mortality.
histcomm <- read_rds("data/BfS/histcomm.Rds") %>%
filter(datum_der_aufnahme >= ymd("2021-01-01")) %>%
select(bfs_gde_num_old, bfs_gde_num_new, gemeindename_new)Further corrections of municipalities mutations are needed to bring
the data to the state of 2022-01-01.
Data preps
Data aggregated to age groups as in deaths dataset.
pop <- read_xlsx("data-raw/BfS/px-x-0103010000_201.xlsx",
col_types = c("numeric",
"skip", "numeric", "text", "skip",
"skip", "skip", "text", "skip", "skip",
"skip", "text", "numeric", "numeric",
"numeric", "numeric", "numeric",
"numeric", "numeric", "numeric",
"numeric", "numeric", "numeric",
"numeric", "numeric", "numeric",
"numeric", "numeric", "numeric",
"numeric", "numeric", "numeric",
"numeric"),
skip = 1) %>%
remove_empty(c("rows", "cols")) %>% clean_names() %>%
rename(year = x1,
GMDNR = x2,
GMDNAME = x3) %>%
mutate(sex = if_else(x5 == "Frau", "Female", "Male"),
nationality = if_else(x4 == "Schweiz", "Swiss", "Foreigner"),
year = as.integer(year)) %>%
select(-x4, -x5) %>%
relocate(sex, .after = GMDNAME) %>%
relocate(nationality, .after = sex) %>%
mutate(GMDNAME = word(GMDNAME, 2, -1)) %>%
fill(year, GMDNR, GMDNAME, nationality) %>%
mutate(`<40` = x0_4_jahre + x5_9_jahre + x10_14_jahre + x15_19_jahre +
x20_24_jahre + x25_29_jahre + x30_34_jahre + x35_39_jahre) %>%
select(-x0_4_jahre, -x5_9_jahre, -x10_14_jahre, -x15_19_jahre,
-x20_24_jahre, -x25_29_jahre, -x30_34_jahre, -x35_39_jahre) %>%
mutate(`40-49` = x40_44_jahre + x45_49_jahre) %>%
mutate(`50-59` = x50_54_jahre + x55_59_jahre) %>%
mutate(`60-69` = x60_64_jahre + x65_69_jahre) %>%
mutate(`70-79` = x70_74_jahre + x75_79_jahre) %>%
select(-x40_44_jahre, -x45_49_jahre, -x50_54_jahre, -x55_59_jahre,
-x60_64_jahre, -x65_69_jahre, -x70_74_jahre, -x75_79_jahre) %>%
mutate(`80+` = x80_84_jahre +x85_89_jahre + x90_94_jahre +
x95_99_jahre + x100_jahre_und_mehr) %>%
select(-x80_84_jahre, -x85_89_jahre, -x90_94_jahre,
-x95_99_jahre, -x100_jahre_und_mehr) %>%
pivot_longer(cols = `<40`:`80+`,
names_to = "age", values_to = "pop") %>%
# BfS identified changes
left_join(histcomm, by = c("GMDNR" = "bfs_gde_num_old")) %>%
mutate(GMDNR = if_else(!is.na(bfs_gde_num_new), bfs_gde_num_new, GMDNR),
GMDNAME = if_else(!is.na(gemeindename_new), gemeindename_new, GMDNAME)) %>%
select(-bfs_gde_num_new, -gemeindename_new) %>%
# Essertes merge
mutate(GMDNR = if_else(GMDNAME == "Essertes", 5805, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Essertes", "Oron", GMDNAME)) %>%
# now to reach the state of `2022-01-01`
mutate(GMDNR = if_else(GMDNAME == "Bad Zurzach", 4324, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Bad Zurzach", "Zurzach", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Baldingen", 4324, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Baldingen", "Zurzach", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Böbikon", 4324, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Böbikon", "Zurzach", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Kaiserstuhl", 4324, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Kaiserstuhl", "Zurzach", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Rekingen (AG)", 4324, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Rekingen (AG)", "Zurzach", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Rietheim", 4324, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Rietheim", "Zurzach", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Rümikon", 4324, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Rümikon", "Zurzach", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Wislikofen", 4324, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Wislikofen", "Zurzach", GMDNAME)) %>%
# Böztal merge
mutate(GMDNR = if_else(GMDNAME == "Bözen", 4185, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Bözen", "Böztal", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Elfingen", 4185, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Elfingen", "Böztal", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Effingen", 4185, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Effingen", "Böztal", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Hornussen", 4185, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Hornussen", "Böztal", GMDNAME)) %>%
# Blonay - Saint-Légier merge
mutate(GMDNR = if_else(GMDNAME == "Blonay", 5892, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Blonay", "Blonay - Saint-Légier", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Saint-Légier-La Chiésaz", 5892, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Saint-Légier-La Chiésaz", "Blonay - Saint-Légier", GMDNAME)) %>%
# Murten merge
mutate(GMDNR = if_else(GMDNAME == "Galmiz", 2275, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Galmiz", "Murten", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Gempenach", 2275, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Gempenach", "Murten", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Clavaleyres", 2275, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Clavaleyres", "Murten", GMDNAME)) %>%
# Schwende-Rüte merge
mutate(GMDNAME = if_else(GMDNAME == "Schwende", "Schwende-Rüte", GMDNAME)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Rüte", "Schwende-Rüte", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Schwende-Rüte", 3112, GMDNR)) %>%
# Val Mara merge
mutate(GMDNAME = if_else(GMDNAME == "Melano", "Val Mara", GMDNAME)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Maroggia", "Val Mara", GMDNAME)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Rovio", "Val Mara", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Val Mara", 5240, GMDNR)) %>%
group_by(year, GMDNR, GMDNAME, sex, age) %>%
summarise(pop = as.integer(sum(pop))) %>%
ungroup() %>%
arrange(year, GMDNR, sex, age)Yearly totals (on 31st Dec!):
# A tibble: 7 × 2
year pop
<int> <chr>
1 2014 8,237,666
2 2015 8,327,126
3 2016 8,419,550
4 2017 8,484,130
5 2018 8,544,527
6 2019 8,606,033
7 2020 8,670,300
Age (40 plus!) distributions over years
Predicting 2020 population
For each age, sex, nationality stratum, for each municipality separately, Poisson model was fitted using the 2014-2019 data; then prediction was made for 2020 & 2021 years. The gist of this operation was to exclude the effect of pandemic from the pop counts in years affected.
pop_ext_poi <- pop %>%
filter(year != 2020) %>%
# filter(GMDNR == 261 | GMDNR == 389) %>%
group_by(GMDNR, age, sex) %>%
do(glm(pop ~ year, data = ., family = "poisson") %>%
predict(., newdata = tibble(year = as.integer(2020)), type = "response") %>%
tibble(year = as.integer(2020), pop_ext_poi = .)) %>%
ungroup() %>%
mutate(pop_ext_poi = as.integer(round(pop_ext_poi))) %>%
arrange(year, GMDNR, sex, age) %>%
relocate(year)Summary of values:
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.0 42.0 109.0 338.5 283.0 117341.0 2
Note: 2 NAs!
# A tibble: 7 × 6
year GMDNR GMDNAME sex age pop
<int> <dbl> <chr> <chr> <chr> <int>
1 2014 4232 Geltwil Male 80+ 0
2 2015 4232 Geltwil Male 80+ 0
3 2016 4232 Geltwil Male 80+ 0
4 2017 4232 Geltwil Male 80+ 0
5 2018 4232 Geltwil Male 80+ 0
6 2019 4232 Geltwil Male 80+ 1
7 2020 4232 Geltwil Male 80+ 3
# A tibble: 1 × 5
year GMDNR age sex pop_ext_poi
<int> <dbl> <chr> <chr> <int>
1 2020 4232 80+ Male NA
# A tibble: 7 × 6
year GMDNR GMDNAME sex age pop
<int> <dbl> <chr> <chr> <chr> <int>
1 2014 389 Meienried Male 80+ 0
2 2015 389 Meienried Male 80+ 0
3 2016 389 Meienried Male 80+ 0
4 2017 389 Meienried Male 80+ 0
5 2018 389 Meienried Male 80+ 0
6 2019 389 Meienried Male 80+ 1
7 2020 389 Meienried Male 80+ 1
# A tibble: 1 × 5
year GMDNR age sex pop_ext_poi
<int> <dbl> <chr> <chr> <int>
1 2020 389 80+ Male NA
Results in large municipality
Results for large municipality are fine. Predictions are in red.
For small municipality we might end up with sth more wiggly.
Note: lack of prediction for 2020 & 2021 for oldest males!
There are two municipalities with missing predictions for oldest males:
# A tibble: 2 × 4
GMDNR age sex GMDNAME
<dbl> <chr> <chr> <chr>
1 389 80+ Male Meienried
2 4232 80+ Male Geltwil
Population in this strata was replaced by estimates of a simple linear model.
pop_ext_lm <- pop %>%
filter(year != 2020) %>%
group_by(GMDNR, age, sex) %>%
do(lm(pop ~ year, data = .) %>%
predict(., newdata = tibble(year = as.integer(2020))) %>%
tibble(year = as.integer(2020), pop_ext_lm = .)) %>%
ungroup() %>%
mutate(pop_ext_lm = as.integer(round(pop_ext_lm))) %>%
arrange(year, GMDNR, sex, age) %>%
relocate(year)pop_complete <- pop %>%
# extrapolated 2020
left_join(pop_ext_poi) %>%
mutate(pop_ext_poi = if_else(year < 2020,
pop, pop_ext_poi)) %>%
# replacing 2 poi missings with lm
left_join(pop_ext_lm) %>%
mutate(pop_ext_poi = if_else(year == 2020 & is.na(pop_ext_poi),
pop_ext_lm, pop_ext_poi)) %>%
select(-pop_ext_lm) Differences in 2020 pops
Comparing estimates to real values
pop_compare <- pop %>%
filter(year == 2020) %>%
select(-year) %>%
# poi model with 2 missings
left_join(pop_ext_poi) %>%
# lm model
left_join(pop_ext_lm) %>%
mutate(pop_ext_poi = if_else(is.na(pop_ext_poi),
pop_ext_lm, pop_ext_poi)) %>%
select(-pop_ext_lm) %>%
# diffs
mutate(pop_diff = pop_ext_poi - pop,
perc_diff = (pop_diff / pop_ext_poi) * 100,
alt = pop * 100) %>%
mutate(perc_diff = if_else(pop_diff == 0, 0, perc_diff),
perc_diff = if_else(pop_ext_poi == 0, alt, perc_diff)) %>%
select(-alt)Differences
frq(pop_compare, pop_ext_poi > pop) pop_ext_poi > pop <lgl>
# total N=25740 valid N=25740 mean=0.50 sd=0.50
Value | N | Raw % | Valid % | Cum. %
----------------------------------------
FALSE | 12931 | 50.24 | 50.24 | 50.24
TRUE | 12809 | 49.76 | 49.76 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
frq(pop_compare, pop_ext_poi < pop) pop_ext_poi < pop <lgl>
# total N=25740 valid N=25740 mean=0.43 sd=0.50
Value | N | Raw % | Valid % | Cum. %
----------------------------------------
FALSE | 14670 | 56.99 | 56.99 | 56.99
TRUE | 11070 | 43.01 | 43.01 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
frq(pop_compare, pop_ext_poi == pop)pop_ext_poi == pop <lgl>
# total N=25740 valid N=25740 mean=0.07 sd=0.26
Value | N | Raw % | Valid % | Cum. %
----------------------------------------
FALSE | 23879 | 92.77 | 92.77 | 92.77
TRUE | 1861 | 7.23 | 7.23 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
summary(pop_compare$pop_diff) Min. 1st Qu. Median Mean 3rd Qu. Max.
-391.000 -4.000 0.000 1.649 5.000 2594.000
summary(pop_compare$perc_diff) Min. 1st Qu. Median Mean 3rd Qu. Max.
-400.0000 -3.9062 0.0000 -0.1833 4.7059 300.0000
Age group specific diffs
ggplot(pop_compare, aes(x = pop_diff, y = perc_diff)) +
geom_point(color = "darkorchid", alpha = 0.2) +
facet_wrap(vars(sex, age), scales = "free") +
theme_light()Municipality specific diffs
pop_compare_agg <- pop_compare %>%
group_by(GMDNR) %>%
summarise(pop = sum(pop),
pop_ext_poi = sum(pop_ext_poi)) %>%
ungroup() %>%
mutate(pop_diff = pop_ext_poi - pop,
perc_diff = (pop_diff / pop_ext_poi) * 100,
alt = pop * 100) %>%
mutate(perc_diff = if_else(pop_diff == 0, 0, perc_diff),
perc_diff = if_else(pop_ext_poi == 0, alt, perc_diff)) %>%
select(-alt)
gg_centr <- read_rds("data/BfS/gg.Rds") %>%
select(GMDNR, GMDNAME) %>%
st_centroid() %>%
left_join(pop_compare_agg)summary(pop_compare_agg$pop_diff) Min. 1st Qu. Median Mean 3rd Qu. Max.
-776.00 -12.00 7.00 19.79 34.00 5237.00
summary(pop_compare_agg$perc_diff) Min. 1st Qu. Median Mean 3rd Qu. Max.
-15.2120 -0.7426 0.5495 0.5697 1.8834 19.6078
ggplot(pop_compare_agg, aes(x = pop_diff, y = perc_diff)) +
geom_point(color = "darkorchid", alpha = 0.2) +
theme_light()Getting mid-year pops
For each age, sex, nationality stratum, for each municipality separately, simple mean of the two years of data is used to estimate the middle point. In such case we use, for instance data from 31st Dec 2014 and 31st Dec 2015 to estimate pop mid-2015.
for (i in 2014:2019){
result <- pop_complete %>%
filter(year >= i & year <= i + 1) %>%
# filter(GMDNR == 261 | GMDNR == 389) %>%
group_by(GMDNR, age, sex) %>%
summarise(pop_mid_poi = mean(pop_ext_poi)) %>%
ungroup() %>%
mutate(year = as.integer(i + 1),
pop_mid_poi = as.integer(round(pop_mid_poi))) %>%
arrange(GMDNR, sex, age) %>%
relocate(year)
if (i == 2014) {
pop_mid_poi <- result
} else {
pop_mid_poi <- bind_rows(pop_mid_poi, result)
}
}Large municipality
Again, results for large municipality are fine:
Small municipality
Again, for small municipality we might end up with sth more wiggly.
Deaths <-> pop link
Prepared pop file is merged to deaths dataset prepared in
02.Rmd.
We use file based on place of residence.
w_deaths_2015_2020_year_pop <- read_rds("data/BfS-closed/monthly_deaths/w_deaths_2015_2020_year.Rds") %>%
left_join(pop_mid_poi) Deaths > pop problem
frq(w_deaths_2015_2020_year_pop, observed > pop_mid_poi)observed > pop_mid_poi <lgl>
# total N=154440 valid N=154440 mean=0.00 sd=0.01
Value | N | Raw % | Valid % | Cum. %
-----------------------------------------
FALSE | 154420 | 99.99 | 99.99 | 99.99
TRUE | 20 | 0.01 | 0.01 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
surplus <- w_deaths_2015_2020_year_pop %>%
filter(observed > pop_mid_poi) %>%
arrange(GMDNR, year) %>%
mutate(difference = pop_mid_poi - observed)There are 20 strata from 19 communities where count of deaths is larger than count of pop. The difference is always 1 in plus over pop 0.
In all these cases pop was increased by 1 to match the count of deaths!
w_deaths_2015_2020_year_pop %<>%
mutate(pop_mid_poi = if_else(observed > pop_mid_poi, observed, pop_mid_poi))Cartogram
2020 pop counts (ignoring age structure) were used to derive cartograms.
Communities & pops
R solution 1
Using cartogram package.
# takes a while!
carto <- carto_data %>%
cartogram::cartogram_cont("pop")
write_rds(carto, "data/carto/carto.Rds")
carto_ncont <- carto_data %>%
cartogram::cartogram_ncont("pop")
write_rds(carto_ncont, "data/carto/carto_ncont.Rds")
carto_dorling <- carto_data %>%
cartogram::cartogram_dorling("pop")
write_rds(carto_dorling, "data/carto/carto_dorling.Rds")R solution 2
Using cartogramR package.
carto_gsm <- carto_data %>%
cartogramR::cartogramR(count = "pop", method = "gsm")
write_rds(carto_gsm, "data/carto/carto_gsm.Rds")ScapeToad version
carto_data %>%
st_write("data/carto/in_gg.shp", delete_dsn = TRUE)
se %>%
st_write("data/carto/in_se.shp", delete_dsn = TRUE)
kt %>%
st_write("data/carto/in_kt.shp", delete_dsn = TRUE)
# java -Xmx1g -jar "C:\Program Files\ScapeToad-v11\ScapeToad.jar"‘Classic’ cartogram
Committee Member: 1(1) 2(1) 3(1) 4(1) 5(1) 6(1) 7(1) 8(1) 9(1) 10(1)
Computing Hierarchical Clustering
Noncontinuous cartogram
Committee Member: 1(1) 2(1) 3(1) 4(1) 5(1) 6(1) 7(1) 8(1) 9(1) 10(1)
Computing Hierarchical Clustering
Swiss-SEP
sep3 <- read_rds("../SNC_Swiss-SEP2/FINAL/RDS/ssep3_user_geo.Rds") %>%
select(gisid, ssep3, ssep3_d) %>%
st_transform(crs = 2056)Using 1,527,173 n’hoods from version 3.0:
ssep3_d <integer>
# total N=1527173 valid N=1527173 mean=5.50 sd=2.87
Value | N | Raw % | Valid % | Cum. %
-----------------------------------------
1 | 152719 | 10 | 10 | 10
2 | 152720 | 10 | 10 | 20
3 | 152713 | 10 | 10 | 30
4 | 152721 | 10 | 10 | 40
5 | 152724 | 10 | 10 | 50
6 | 152708 | 10 | 10 | 60
7 | 152717 | 10 | 10 | 70
8 | 152722 | 10 | 10 | 80
9 | 152712 | 10 | 10 | 90
10 | 152717 | 10 | 10 | 100
<NA> | 0 | 0 | <NA> | <NA>
This is aggregated to the level of 2,145 communities from swisstopo.
STATPOP
statpop2020 <- read_delim("data-raw/BfS-closed/STATPOP/statpop2020_220098p.zip",
delim = ";", escape_double = FALSE,
col_types = cols(STATYEAR = col_integer(),
SEX = col_integer(),
TYPEOFRESIDENCE = col_integer(),
POPULATIONTYPE = col_integer(),
AGE = col_integer(),
CLASSAGEFIVEYEARS = col_integer(),
NATIONALITYCATEGORY = col_integer(),
MAINRESIDENCECATEGORY = col_integer(),
GEOCOORDE = col_integer(),
GEOCOORDN = col_integer(),
INDIC_EGID = col_integer(),
statdate = col_date(format = "%d/%m/%Y")),
trim_ws = TRUE) %>%
remove_empty(c("rows", "cols")) %>% clean_names()
statpop2020_agg <- statpop2020 %>%
rename(egid = federalbuildingid) %>%
filter(typeofresidence == 1) %>%
filter(populationtype == 1) %>%
filter(mainresidencecategory == 1) %>%
filter(indic_egid == 1) %>%
select(-statyear, -statdate,
-typeofresidence, -populationtype, -mainresidencecategory,
-indic_egid) %>%
group_by(egid, geocoorde, geocoordn) %>%
summarise(pop = n()) %>%
ungroup() %>%
st_as_sf(coords = c("geocoorde", "geocoordn"),
crs = 2056,
remove = TRUE)Using data from STATPOP 2020.
Dataset consists of 8,859,558 individuals and 1,549,502 buildings. We selected individuals with type of residence Hauptwohnsitz, pop type Ständige Wohnbevölkerung, main residency category Nur ein Hauptwohnsitz and with valid EGID building ID.
The nearest building with SSEP was assigned for each of these building. The point dataset was then overlaid with community boundaries and summary measures of pop based index were created and pop level SEP was aggregated by community.
’nhood based averages for communities
# # or with full version?
# st_gg <- st_read("data-raw/BfS/ag-b-00.03-875-gg/ggg_2021-LV95/shp/g1g21_01072021.shp",
# as_tibble = TRUE)
ssep3_gem <-
st_join(sep3,
st_gg,
join = st_intersects)
# tm_shape(gg) +
# tm_polygons() +
# tm_shape(ssep3_gem %>% filter(is.na(GMDNR))) +
# tm_dots()
ssep3_gem_neigh <- ssep3_gem %>%
st_drop_geometry() %>%
group_by(GMDNR) %>%
summarise(n = n(),
ssep3_median = median(ssep3)) %>%
ungroup()
ssep3_gem_neigh_geo <- inner_join(gg, ssep3_gem_neigh) %>%
filter(!is.na(ssep3_median)) %>%
mutate(median_ssep3_d = ntile(ssep3_median, 10)) %>%
mutate(median_ssep3_d = factor(median_ssep3_d,
levels = 1:10,
labels = c("1st - lowest",
"2", "3", "4",
"5th decile",
"6", "7", "8", "9",
"10th - highest")))
write_rds(ssep3_gem_neigh_geo, "data/BfS/ssep3_gem_neigh_geo.rds")Watch out for small n! Here some examples with communities below 20 n’hoods
Final map using median index - bubbles scaled to number of ’nhoods within community.
Population based averages for communities
# join to sep3
statpop2020_agg_sep <- st_join(statpop2020_agg, sep3, join = st_nearest_feature)
# # fails for unknown and unsolved reasons
# nearest <- st_nearest_feature(statpop2020_agg, sep3)
# statpop2020_agg_sep$dist3 <- st_distance(statpop2020_agg_sep, sep3[nearest, ], by_element = TRUE)
# rm(nearest, statpop2020_agg); gc()
# summary(statpop2020_agg_sep$dist3)
statpop2020_agg_sep_gem <-
st_join(statpop2020_agg_sep, st_gg, join = st_intersects) %>%
relocate(egid, gisid, pop,
ssep3, ssep3_d,
GMDNR)
write_rds(statpop2020_agg_sep_gem, "data/BfS-closed/SEP/statpop2020_agg_sep_gem.Rds")ssep3_gem_pop <- statpop2020_agg_sep_gem %>%
select(GMDNR, ssep3, pop) %>%
uncount(pop) %>%
group_by(GMDNR) %>%
summarise(pop = n(),
ssep3_median = median(ssep3)) %>%
ungroup()
# left_join(gg, ssep3_gem_pop) %>%
# filter(is.na(ssep3_median))
ssep3_gem_pop_geo <-
left_join(st_gg, ssep3_gem_pop) %>%
filter(!is.na(ssep3_median)) %>%
mutate(median_ssep3_d = ntile(ssep3_median, 10)) %>%
mutate(median_ssep3_d = factor(median_ssep3_d,
levels = 1:10,
labels = c("1st - lowest",
"2", "3", "4",
"5th decile",
"6", "7", "8", "9",
"10th - highest")))
write_rds(ssep3_gem_pop_geo, "data/BfS/ssep3_gem_pop_geo.rds")Final map using median index - bubbles scaled to number of people (STATPOP 2020)
Raumgliederungen
Data from the BfS
app. State as of 2021-07-01 to match spatial data
nicely.
raum <- read_xlsx("data-raw/BfS/Raumgliederungen.xlsx",
skip = 1) %>%
remove_empty(c("rows", "cols")) %>% clean_names() %>%
filter(! is.na(bfs_gde_nummer)) %>%
rename(GMDNR = bfs_gde_nummer,
GMDNAME = gemeindename,
KTNAME = kanton,
BZNR = bezirks_nummer,
BZNAME = bezirksname) %>%
select(GMDNR, GMDNAME, KTNAME, BZNR, BZNAME,
stadtische_landliche_gebiete, sprachgebiete,
urbanisierungsgrad_2011_degurba_eurostat) %>%
mutate(
r_urban1 = case_when(
stadtische_landliche_gebiete == 1 ~ "Urban",
stadtische_landliche_gebiete == 2 ~ "Periurban",
stadtische_landliche_gebiete == 3 ~ "Rural",
TRUE ~ ""),
r_urban2 = case_when(
urbanisierungsgrad_2011_degurba_eurostat == 1 ~ "Dense",
urbanisierungsgrad_2011_degurba_eurostat == 2 ~ "Medium",
urbanisierungsgrad_2011_degurba_eurostat == 3 ~ "Low",
TRUE ~ ""),
r_lang = case_when(
sprachgebiete == 1 ~ "German",
sprachgebiete == 2 ~ "French",
sprachgebiete == 3 ~ "Italian",
sprachgebiete == 4 ~ "Romansh",
TRUE ~ "")
) %>%
select(-stadtische_landliche_gebiete, -sprachgebiete,
-urbanisierungsgrad_2011_degurba_eurostat)Urbanization 1
r_urban1 <character>
# total N=2145 valid N=2145 mean=1.96 sd=0.69
Value | N | Raw % | Valid % | Cum. %
-------------------------------------------
Periurban | 560 | 26.11 | 26.11 | 26.11
Rural | 1110 | 51.75 | 51.75 | 77.86
Urban | 475 | 22.14 | 22.14 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
Urbanization 2
Using DEGURBA.
r_urban2 <character>
# total N=2145 valid N=2145 mean=2.38 sd=0.57
Value | N | Raw % | Valid % | Cum. %
----------------------------------------
Dense | 98 | 4.57 | 4.57 | 4.57
Low | 1136 | 52.96 | 52.96 | 57.53
Medium | 911 | 42.47 | 42.47 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
Language region
r_lang <character>
# total N=2145 valid N=2145 mean=1.78 sd=0.57
Value | N | Raw % | Valid % | Cum. %
-----------------------------------------
French | 618 | 28.81 | 28.81 | 28.81
German | 1389 | 64.76 | 64.76 | 93.57
Italian | 121 | 5.64 | 5.64 | 99.21
Romansh | 17 | 0.79 | 0.79 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
Voting
Data from swissdd package.
Linked to municipality boundaries from 2022-01-01 with
two corrections form #28.
st_gg <- read_rds("data/swisstopo/st_gg.Rds")June vote
Bundesgesetz vom 25.09.2020 über die gesetzlichen Grundlagen für Verordnungen des Bundesrates zur Bewältigung der Covid-19-Epidemie (Covid-19-Gesetz)
Data
covid_jun <- get_nationalvotes(votedates = "2021-06-13",
geolevel = "municipality") %>%
filter(str_detect(name, "Covid-19")) %>%
select(-name, -id)Dataset consists of 2,141 communities. Votes from abroad are excluded.
Results preview
Link to municipalities
Using GMDNR / mun_id for deterministic
linkage.
covid_jun_gem <- st_gg %>%
left_join(covid_jun)There are few communities that exist on the map but do not exist in the voting dataset:
# A tibble: 4 × 2
GMDNR GMDNAME
<dbl> <chr>
1 389 Meienried
2 408 Hellsau
3 535 Deisswil bei Münchenbuchsee
4 877 Niedermuhlern
November vote
Änderung des Bundesgesetzes über die gesetzlichen Grundlagen für Verordnungen des Bundesrates zur Bewältigung der Covid-19-Epidemie (Covid-19-Gesetz) (Härtefälle, Arbeitslosenversicherung, familienergänzende Kinderbetreuung, Kulturschaffende, Veranstaltungen)
Data
covid_nov <- get_nationalvotes(votedates = "2021-11-28",
geolevel = "municipality") %>%
filter(str_detect(name, "Covid-19")) %>%
select(-name, -id)Dataset consists of 2,141 communities. Votes from abroad are excluded.
Results preview
Link to municipalities
Using GMDNR / mun_id for deterministic
linkage.
covid_nov_gem <- st_gg %>%
left_join(covid_nov)There are again few communities that exist on the map but do not exist in the voting dataset:
# A tibble: 4 × 2
GMDNR GMDNAME
<dbl> <chr>
1 389 Meienried
2 408 Hellsau
3 535 Deisswil bei Münchenbuchsee
4 877 Niedermuhlern
Mobility & restrictions
Google community mobility reports from
COVID19 package.
gmr <- covid19(country = "CHE", level = 2,
start = "2020-01-01",
end = "2020-12-31",
gmr = TRUE, verbose = FALSE) %>%
select(-administrative_area_level,
-administrative_area_level_1,
-administrative_area_level_2,
-administrative_area_level_3,
-iso_numeric, -iso_currency,
- iso_alpha_2, -iso_alpha_3,
- latitude, -longitude, -population,
- key_google_mobility, -key_apple_mobility, -key_jhu_csse, -key_nuts, -key_gadm,
-(confirmed:vent)) %>%
rename(KTNAME = key_local) %>%
relocate(id, date, KTNAME)Data structure for example of 2020 data:
| Name | gmr |
| Number of rows | 8135 |
| Number of columns | 27 |
| _______________________ | |
| Column type frequency: | |
| character | 2 |
| Date | 1 |
| numeric | 24 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| id | 0 | 1 | 8 | 8 | 0 | 26 | 0 |
| KTNAME | 0 | 1 | 2 | 2 | 0 | 26 | 0 |
Variable type: Date
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| date | 0 | 1 | 2020-02-01 | 2020-12-31 | 2020-07-28 | 335 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| school_closing | 0 | 1.00 | 0.92 | 1.24 | 0.00 | 0.00 | 0.0 | 2.00 | 3 | ▇▁▁▂▂ |
| workplace_closing | 0 | 1.00 | 0.11 | 2.03 | -3.00 | -2.00 | 0.0 | 2.00 | 3 | ▇▂▂▁▇ |
| cancel_events | 0 | 1.00 | 1.23 | 1.43 | -2.00 | 2.00 | 2.0 | 2.00 | 2 | ▂▁▁▁▇ |
| gatherings_restrictions | 0 | 1.00 | 0.95 | 3.21 | -3.00 | -3.00 | 3.0 | 4.00 | 4 | ▆▁▂▁▇ |
| transport_closing | 0 | 1.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0 | ▁▁▇▁▁ |
| stay_home_restrictions | 0 | 1.00 | 0.55 | 0.50 | 0.00 | 0.00 | 1.0 | 1.00 | 1 | ▆▁▁▁▇ |
| internal_movement_restrictions | 0 | 1.00 | 0.26 | 0.44 | 0.00 | 0.00 | 0.0 | 1.00 | 1 | ▇▁▁▁▃ |
| international_movement_restrictions | 0 | 1.00 | 2.79 | 0.73 | 0.00 | 3.00 | 3.0 | 3.00 | 3 | ▁▁▁▁▇ |
| information_campaigns | 0 | 1.00 | 1.96 | 0.25 | 0.00 | 2.00 | 2.0 | 2.00 | 2 | ▁▁▁▁▇ |
| testing_policy | 0 | 1.00 | 2.10 | 0.72 | 0.00 | 2.00 | 2.0 | 3.00 | 3 | ▁▃▁▇▆ |
| contact_tracing | 0 | 1.00 | 1.75 | 0.44 | 0.00 | 2.00 | 2.0 | 2.00 | 2 | ▁▁▂▁▇ |
| facial_coverings | 0 | 1.00 | 1.08 | 1.74 | -3.00 | 0.00 | 1.0 | 2.00 | 3 | ▂▁▃▃▇ |
| vaccination_policy | 0 | 1.00 | 0.03 | 0.17 | 0.00 | 0.00 | 0.0 | 0.00 | 1 | ▇▁▁▁▁ |
| elderly_people_protection | 0 | 1.00 | 1.32 | 1.88 | -3.00 | 2.00 | 2.0 | 2.00 | 3 | ▂▁▁▁▇ |
| government_response_index | 0 | 1.00 | -49.55 | 10.42 | -60.42 | -57.81 | -48.7 | -45.05 | 0 | ▇▅▁▁▁ |
| stringency_index | 0 | 1.00 | -49.46 | 15.17 | -73.15 | -59.26 | -46.3 | -39.35 | 0 | ▆▅▇▂▁ |
| containment_health_index | 0 | 1.00 | -51.14 | 10.07 | -63.33 | -60.12 | -50.3 | -47.92 | 0 | ▇▇▁▁▁ |
| economic_support_index | 0 | 1.00 | -38.39 | 18.15 | -62.50 | -37.50 | -37.5 | -37.50 | 0 | ▃▇▁▁▂ |
| retail_and_recreation_percent_change_from_baseline | 1960 | 0.76 | -25.80 | 26.68 | -93.00 | -42.00 | -17.0 | -6.00 | 56 | ▂▂▇▂▁ |
| grocery_and_pharmacy_percent_change_from_baseline | 2499 | 0.69 | -2.42 | 19.52 | -95.00 | -7.00 | 0.0 | 6.00 | 85 | ▁▁▇▁▁ |
| parks_percent_change_from_baseline | 4338 | 0.47 | 25.63 | 58.22 | -77.00 | -17.00 | 12.0 | 58.00 | 325 | ▇▇▂▁▁ |
| transit_stations_percent_change_from_baseline | 608 | 0.93 | -21.20 | 21.43 | -84.00 | -35.00 | -21.0 | -8.00 | 78 | ▂▇▇▁▁ |
| workplaces_percent_change_from_baseline | 608 | 0.93 | -23.11 | 17.14 | -90.00 | -32.00 | -20.0 | -12.00 | 17 | ▁▁▅▇▂ |
| residential_percent_change_from_baseline | 2650 | 0.67 | 7.95 | 6.99 | -6.00 | 3.00 | 7.0 | 11.00 | 37 | ▃▇▂▁▁ |
Note: description of variables is described here.
Example of ‘workplace % change from baseline’ data for canton BE
Apple
Apple’s (discontinued) Mobility Trend Reports
from covmobility package.
data(apple_mobility)
apple_mobility = apple_mobility %>%
filter(country == "Switzerland") %>%
filter(date <= ymd("2020-12-31")) %>%
filter(geo_type == "sub-region") %>%
select(-geo_type, -country, - alternative_name, -sub_region)Simpler dataset with a 0-Inf score for each day and canton across three trasport modes.
transportation_type <character>
# total N=16284 valid N=16284 mean=1.74 sd=0.90
Value | N | Raw % | Valid % | Cum. %
-----------------------------------------
driving | 9204 | 56.52 | 56.52 | 56.52
transit | 2124 | 13.04 | 13.04 | 69.57
walking | 4956 | 30.43 | 30.43 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
10.76 84.98 106.12 112.82 134.93 621.56 756
Example of data for canton BE
Note: Appenzell Innerrhoden & Uri have zero data!
Final link & save
w_deaths_2015_2020_year_fin <- w_deaths_2015_2020_year_pop %>%
left_join(ssep3_gem_pop_geo %>%
st_drop_geometry() %>%
select(-pop)) %>%
left_join(gg21_merged %>%
st_drop_geometry() %>%
select(-GMDNAME, -KTNAME, -BZNR, -BZNAME)
)
write_rds(w_deaths_2015_2020_year_fin, "data/BfS-closed/monthly_deaths/w_deaths_2015_2020_year_fin.Rds")